/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 * ICSSearchAlgorithm.java
 * Copyright (C) 2004-2012 University of Waikato, Hamilton, New Zealand
 * 
 */

package weka.classifiers.bayes.net.search.ci;

import java.io.FileReader;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;

import weka.classifiers.bayes.BayesNet;
import weka.classifiers.bayes.net.ParentSet;
import weka.core.Instances;
import weka.core.Option;
import weka.core.Utils;

/**
 * <!-- globalinfo-start --> This Bayes Network learning algorithm uses
 * conditional independence tests to find a skeleton, finds V-nodes and applies
 * a set of rules to find the directions of the remaining arrows.
 * <p/>
 * <!-- globalinfo-end -->
 * 
 * <!-- options-start --> Valid options are:
 * <p/>
 * 
 * <pre>
 * -cardinality &lt;num&gt;
 *  When determining whether an edge exists a search is performed 
 *  for a set Z that separates the nodes. MaxCardinality determines 
 *  the maximum size of the set Z. This greatly influences the 
 *  length of the search. (default 2)
 * </pre>
 * 
 * <pre>
 * -mbc
 *  Applies a Markov Blanket correction to the network structure, 
 *  after a network structure is learned. This ensures that all 
 *  nodes in the network are part of the Markov blanket of the 
 *  classifier node.
 * </pre>
 * 
 * <pre>
 * -S [BAYES|MDL|ENTROPY|AIC|CROSS_CLASSIC|CROSS_BAYES]
 *  Score type (BAYES, BDeu, MDL, ENTROPY and AIC)
 * </pre>
 * 
 * <!-- options-end -->
 * 
 * @author Remco Bouckaert
 * @version $Revision$
 */
public class ICSSearchAlgorithm extends CISearchAlgorithm {

    /** for serialization */
    static final long serialVersionUID = -2510985917284798576L;

    /**
     * returns the name of the attribute with the given index
     * 
     * @param iAttribute the index of the attribute
     * @return the name of the attribute
     */
    String name(int iAttribute) {
        return m_instances.attribute(iAttribute).name();
    }

    /**
     * returns the number of attributes
     * 
     * @return the number of attributes
     */
    int maxn() {
        return m_instances.numAttributes();
    }

    /** maximum size of separating set **/
    private int m_nMaxCardinality = 2;

    /**
     * sets the cardinality
     * 
     * @param nMaxCardinality the max cardinality
     */
    public void setMaxCardinality(int nMaxCardinality) {
        m_nMaxCardinality = nMaxCardinality;
    }

    /**
     * returns the max cardinality
     * 
     * @return the max cardinality
     */
    public int getMaxCardinality() {
        return m_nMaxCardinality;
    }

    class SeparationSet {

        public int[] m_set;

        /**
         * constructor
         */
        public SeparationSet() {
            m_set = new int[getMaxCardinality() + 1];
        } // c'tor

        public boolean contains(int nItem) {
            for (int iItem = 0; iItem < getMaxCardinality() && m_set[iItem] != -1; iItem++) {
                if (m_set[iItem] == nItem) {
                    return true;
                }
            }
            return false;
        } // contains

    } // class sepset

    /**
     * Search for Bayes network structure using ICS algorithm
     * 
     * @param bayesNet  datastructure to build network structure for
     * @param instances data set to learn from
     * @throws Exception if something goes wrong
     */
    @Override
    protected void search(BayesNet bayesNet, Instances instances) throws Exception {
        // init
        m_BayesNet = bayesNet;
        m_instances = instances;

        boolean edges[][] = new boolean[maxn() + 1][];
        boolean[][] arrows = new boolean[maxn() + 1][];
        SeparationSet[][] sepsets = new SeparationSet[maxn() + 1][];
        for (int iNode = 0; iNode < maxn() + 1; iNode++) {
            edges[iNode] = new boolean[maxn()];
            arrows[iNode] = new boolean[maxn()];
            sepsets[iNode] = new SeparationSet[maxn()];
        }

        calcDependencyGraph(edges, sepsets);
        calcVeeNodes(edges, arrows, sepsets);
        calcArcDirections(edges, arrows);

        // transfrom into BayesNet datastructure
        for (int iNode = 0; iNode < maxn(); iNode++) {
            // clear parent set of AttributeX
            ParentSet oParentSet = m_BayesNet.getParentSet(iNode);
            while (oParentSet.getNrOfParents() > 0) {
                oParentSet.deleteLastParent(m_instances);
            }
            for (int iParent = 0; iParent < maxn(); iParent++) {
                if (arrows[iParent][iNode]) {
                    oParentSet.addParent(iParent, m_instances);
                }
            }
        }
    } // search

    /**
     * CalcDependencyGraph determines the skeleton of the BayesNetwork by starting
     * with a complete graph and removing edges (a--b) if it can find a set Z such
     * that a and b conditionally independent given Z. The set Z is found by trying
     * all possible subsets of nodes adjacent to a and b, first of size 0, then of
     * size 1, etc. up to size m_nMaxCardinality
     * 
     * @param edges   boolean matrix representing the edges
     * @param sepsets set of separating sets
     */
    void calcDependencyGraph(boolean[][] edges, SeparationSet[][] sepsets) {
        /* calc undirected graph a-b iff D(a,S,b) for all S) */
        SeparationSet oSepSet;

        for (int iNode1 = 0; iNode1 < maxn(); iNode1++) {
            /* start with complete graph */
            for (int iNode2 = 0; iNode2 < maxn(); iNode2++) {
                edges[iNode1][iNode2] = true;
            }
        }
        for (int iNode1 = 0; iNode1 < maxn(); iNode1++) {
            edges[iNode1][iNode1] = false;
        }

        for (int iCardinality = 0; iCardinality <= getMaxCardinality(); iCardinality++) {
            for (int iNode1 = 0; iNode1 <= maxn() - 2; iNode1++) {
                for (int iNode2 = iNode1 + 1; iNode2 < maxn(); iNode2++) {
                    if (edges[iNode1][iNode2]) {
                        oSepSet = existsSepSet(iNode1, iNode2, iCardinality, edges);
                        if (oSepSet != null) {
                            edges[iNode1][iNode2] = false;
                            edges[iNode2][iNode1] = false;
                            sepsets[iNode1][iNode2] = oSepSet;
                            sepsets[iNode2][iNode1] = oSepSet;
                            // report separating set
                            System.err.print("I(" + name(iNode1) + ", {");
                            for (int iNode3 = 0; iNode3 < iCardinality; iNode3++) {
                                System.err.print(name(oSepSet.m_set[iNode3]) + " ");
                            }
                            System.err.print("} ," + name(iNode2) + ")\n");
                        }
                    }
                }
            }
            // report current state of dependency graph
            System.err.print(iCardinality + " ");
            for (int iNode1 = 0; iNode1 < maxn(); iNode1++) {
                System.err.print(name(iNode1) + " ");
            }
            System.err.print('\n');
            for (int iNode1 = 0; iNode1 < maxn(); iNode1++) {
                for (int iNode2 = 0; iNode2 < maxn(); iNode2++) {
                    if (edges[iNode1][iNode2]) {
                        System.err.print("X ");
                    } else {
                        System.err.print(". ");
                    }
                }
                System.err.print(name(iNode1) + " ");
                System.err.print('\n');
            }
        }
    } /* CalcDependencyGraph */

    /**
     * ExistsSepSet tests if a separating set Z of node a and b exists of given
     * cardiniality exists. The set Z is found by trying all possible subsets of
     * nodes adjacent to both a and b of the requested cardinality.
     * 
     * @param iNode1       index of first node a
     * @param iNode2       index of second node b
     * @param nCardinality size of the separating set Z
     * @param edges
     * @return SeparationSet containing set that separates iNode1 and iNode2 or null
     *         if no such set exists
     */
    SeparationSet existsSepSet(int iNode1, int iNode2, int nCardinality, boolean[][] edges) {
        /* Test if a separating set of node d and e exists of cardiniality k */
        // int iNode1_, iNode2_;
        int iNode3, iZ;
        SeparationSet Z = new SeparationSet();
        Z.m_set[nCardinality] = -1;

        // iNode1_ = iNode1;
        // iNode2_ = iNode2;

        // find first candidate separating set Z
        if (nCardinality > 0) {
            Z.m_set[0] = next(-1, iNode1, iNode2, edges);
            iNode3 = 1;
            while (iNode3 < nCardinality) {
                Z.m_set[iNode3] = next(Z.m_set[iNode3 - 1], iNode1, iNode2, edges);
                iNode3++;
            }
        }

        if (nCardinality > 0) {
            iZ = maxn() - Z.m_set[nCardinality - 1] - 1;
        } else {
            iZ = 0;
        }

        while (iZ >= 0) {
            // check if candidate separating set makes iNode2_ and iNode1_ independent
            if (isConditionalIndependent(iNode2, iNode1, Z.m_set, nCardinality)) {
                return Z;
            }
            // calc next candidate separating set
            if (nCardinality > 0) {
                Z.m_set[nCardinality - 1] = next(Z.m_set[nCardinality - 1], iNode1, iNode2, edges);
            }
            iZ = nCardinality - 1;
            while (iZ >= 0 && Z.m_set[iZ] >= maxn()) {
                iZ = nCardinality - 1;
                while (iZ >= 0 && Z.m_set[iZ] >= maxn()) {
                    iZ--;
                }
                if (iZ < 0) {
                    break;
                }
                Z.m_set[iZ] = next(Z.m_set[iZ], iNode1, iNode2, edges);
                for (iNode3 = iZ + 1; iNode3 < nCardinality; iNode3++) {
                    Z.m_set[iNode3] = next(Z.m_set[iNode3 - 1], iNode1, iNode2, edges);
                }
                iZ = nCardinality - 1;
            }
        }

        return null;
    } /* ExistsSepSet */

    /**
     * determine index of node that makes next candidate separating set adjacent to
     * iNode1 and iNode2, but not iNode2 itself
     * 
     * @param x      index of current node
     * @param iNode1 first node
     * @param iNode2 second node (must be larger than iNode1)
     * @param edges  skeleton so far
     * @return int index of next node adjacent to iNode1 after x
     */
    int next(int x, int iNode1, int iNode2, boolean[][] edges) {
        x++;
        while (x < maxn() && (!edges[iNode1][x] || !edges[iNode2][x] || x == iNode2)) {
            x++;
        }
        return x;
    } /* next */

    /**
     * CalcVeeNodes tries to find V-nodes, i.e. nodes a,b,c such that a->c<-b and
     * a-/-b. These nodes are identified by finding nodes a,b,c in the skeleton such
     * that a--c, c--b and a-/-b and furthermore c is not in the set Z that
     * separates a and b
     * 
     * @param edges   skeleton
     * @param arrows  resulting partially directed skeleton after all V-nodes have
     *                been identified
     * @param sepsets separating sets
     */
    void calcVeeNodes(boolean[][] edges, boolean[][] arrows, SeparationSet[][] sepsets) {

        // start with complete empty graph
        for (int iNode1 = 0; iNode1 < maxn(); iNode1++) {
            for (int iNode2 = 0; iNode2 < maxn(); iNode2++) {
                arrows[iNode1][iNode2] = false;
            }
        }

        for (int iNode1 = 0; iNode1 < maxn() - 1; iNode1++) {
            for (int iNode2 = iNode1 + 1; iNode2 < maxn(); iNode2++) {
                if (!edges[iNode1][iNode2]) { /* i nonadj j */
                    for (int iNode3 = 0; iNode3 < maxn(); iNode3++) {
                        if ((iNode3 != iNode1 && iNode3 != iNode2 && edges[iNode1][iNode3] && edges[iNode2][iNode3]) & (!sepsets[iNode1][iNode2].contains(iNode3))) {
                            arrows[iNode1][iNode3] = true; /* add arc i->k */
                            arrows[iNode2][iNode3] = true; /* add arc j->k */
                        }
                    }
                }
            }
        }
    } // CalcVeeNodes

    /**
     * CalcArcDirections assigns directions to edges that remain after V-nodes have
     * been identified. The arcs are directed using the following rules: Rule 1:
     * i->j--k & i-/-k => j->k Rule 2: i->j->k & i--k => i->k Rule 3 m /|\ i | k =>
     * m->j i->j<-k \|/ j
     * 
     * Rule 4 m / \ i---k => i->m & k->m i->j \ / j Rule 5: if no edges are directed
     * then take a random one (first we can find)
     * 
     * @param edges  skeleton
     * @param arrows resulting fully directed DAG
     */
    void calcArcDirections(boolean[][] edges, boolean[][] arrows) {
        /* give direction to remaining arcs */
        int i, j, k, m;
        boolean bFound;

        do {
            bFound = false;

            /* Rule 1: i->j--k & i-/-k => j->k */

            for (i = 0; i < maxn(); i++) {
                for (j = 0; j < maxn(); j++) {
                    if (i != j && arrows[i][j]) {
                        for (k = 0; k < maxn(); k++) {
                            if (i != k && j != k && edges[j][k] && !edges[i][k] && !arrows[j][k] && !arrows[k][j]) {
                                arrows[j][k] = true;
                                bFound = true;
                            }
                        }
                    }
                }
            }

            /* Rule 2: i->j->k & i--k => i->k */

            for (i = 0; i < maxn(); i++) {
                for (j = 0; j < maxn(); j++) {
                    if (i != j && arrows[i][j]) {
                        for (k = 0; k < maxn(); k++) {
                            if (i != k && j != k && edges[i][k] && arrows[j][k] && !arrows[i][k] && !arrows[k][i]) {
                                arrows[i][k] = true;
                                bFound = true;
                            }
                        }
                    }
                }
            }

            /*
             * Rule 3 m /|\ i | k => m->j i->j<-k \|/ j
             */
            for (i = 0; i < maxn(); i++) {
                for (j = 0; j < maxn(); j++) {
                    if (i != j && arrows[i][j]) {
                        for (k = 0; k < maxn(); k++) {
                            if (k != i && k != j && arrows[k][j] && !edges[k][i]) {
                                for (m = 0; m < maxn(); m++) {
                                    if (m != i && m != j && m != k && edges[m][i] && !arrows[m][i] && !arrows[i][m] && edges[m][j] && !arrows[m][j] && !arrows[j][m] && edges[m][k] && !arrows[m][k] && !arrows[k][m]) {
                                        arrows[m][j] = true;
                                        bFound = true;
                                    }
                                }
                            }
                        }
                    }
                }
            }

            /*
             * Rule 4 m / \ i---k => i->m & k->m i->j \ / j
             */
            for (i = 0; i < maxn(); i++) {
                for (j = 0; j < maxn(); j++) {
                    if (i != j && arrows[j][i]) {
                        for (k = 0; k < maxn(); k++) {
                            if (k != i && k != j && edges[k][j] && !arrows[k][j] && !arrows[j][k] && edges[k][i] && !arrows[k][i] && !arrows[i][k]) {
                                for (m = 0; m < maxn(); m++) {
                                    if (m != i && m != j && m != k && edges[m][i] && !arrows[m][i] && !arrows[i][m] && edges[m][k] && !arrows[m][k] && !arrows[k][m]) {
                                        arrows[i][m] = true;
                                        arrows[k][m] = true;
                                        bFound = true;
                                    }
                                }
                            }
                        }
                    }
                }
            }

            /*
             * Rule 5: if no edges are directed then take a random one (first we can find)
             */

            if (!bFound) {
                i = 0;
                while (!bFound && i < maxn()) {
                    j = 0;
                    while (!bFound && j < maxn()) {
                        if (edges[i][j] && !arrows[i][j] && !arrows[j][i]) {
                            arrows[i][j] = true;
                            bFound = true;
                        }
                        j++;
                    }
                    i++;
                }
            }

        } while (bFound);

    } // CalcArcDirections

    /**
     * Returns an enumeration describing the available options.
     * 
     * @return an enumeration of all the available options.
     */
    @Override
    public Enumeration<Option> listOptions() {
        Vector<Option> result = new Vector<Option>();

        result.addElement(new Option("\tWhen determining whether an edge exists a search is performed \n" + "\tfor a set Z that separates the nodes. MaxCardinality determines \n" + "\tthe maximum size of the set Z. This greatly influences the \n" + "\tlength of the search. (default 2)", "cardinality", 1, "-cardinality <num>"));

        result.addAll(Collections.list(super.listOptions()));

        return result.elements();
    } // listOption

    /**
     * Parses a given list of options.
     * <p/>
     * 
     * <!-- options-start --> Valid options are:
     * <p/>
     * 
     * <pre>
     * -cardinality &lt;num&gt;
     *  When determining whether an edge exists a search is performed 
     *  for a set Z that separates the nodes. MaxCardinality determines 
     *  the maximum size of the set Z. This greatly influences the 
     *  length of the search. (default 2)
     * </pre>
     * 
     * <pre>
     * -mbc
     *  Applies a Markov Blanket correction to the network structure, 
     *  after a network structure is learned. This ensures that all 
     *  nodes in the network are part of the Markov blanket of the 
     *  classifier node.
     * </pre>
     * 
     * <pre>
     * -S [BAYES|MDL|ENTROPY|AIC|CROSS_CLASSIC|CROSS_BAYES]
     *  Score type (BAYES, BDeu, MDL, ENTROPY and AIC)
     * </pre>
     * 
     * <!-- options-end -->
     * 
     * @param options the list of options as an array of strings
     * @throws Exception if an option is not supported
     */
    @Override
    public void setOptions(String[] options) throws Exception {
        String tmpStr;

        tmpStr = Utils.getOption("cardinality", options);
        if (tmpStr.length() != 0) {
            setMaxCardinality(Integer.parseInt(tmpStr));
        } else {
            setMaxCardinality(2);
        }

        super.setOptions(options);
    } // setOptions

    /**
     * Gets the current settings of the Classifier.
     * 
     * @return an array of strings suitable for passing to setOptions
     */
    @Override
    public String[] getOptions() {
        Vector<String> result = new Vector<String>();

        result.add("-cardinality");
        result.add("" + getMaxCardinality());

        Collections.addAll(result, super.getOptions());

        return result.toArray(new String[result.size()]);
    } // getOptions

    /**
     * @return a string to describe the MaxCardinality option.
     */
    public String maxCardinalityTipText() {
        return "When determining whether an edge exists a search is performed for a set Z " + "that separates the nodes. MaxCardinality determines the maximum size of the set Z. " + "This greatly influences the length of the search. Default value is 2.";
    } // maxCardinalityTipText

    /**
     * This will return a string describing the search algorithm.
     * 
     * @return The string.
     */
    @Override
    public String globalInfo() {
        return "This Bayes Network learning algorithm uses conditional independence tests " + "to find a skeleton, finds V-nodes and applies a set of rules to find the directions " + "of the remaining arrows.";
    }

    /**
     * for testing the class
     * 
     * @param argv the commandline parameters
     */
    static public void main(String[] argv) {
        try {
            BayesNet b = new BayesNet();
            b.setSearchAlgorithm(new ICSSearchAlgorithm());
            Instances instances = new Instances(new FileReader("C:\\eclipse\\workspace\\weka\\data\\contact-lenses.arff"));
            instances.setClassIndex(instances.numAttributes() - 1);
            b.buildClassifier(instances);
            System.out.println(b.toString());
        } catch (Exception e) {
            e.printStackTrace();
        }
    } // main

} // class ICSSearchAlgorithm
